###############################################################################
# AZ cleaning
#
# written may 8 2024
#   sbaltz at mit
###############################################################################

import pandas as pd

RAW_FILE_NAME = '../transcribed/2020_general_pima_hand_countpdf.csv'
SAVE_FILE_NAME = '../ready/pima_cleaned.csv'
COUNTY_NAME = 'PIMA'

ct = pd.read_csv(RAW_FILE_NAME)

cleaned = pd.DataFrame()

for i in range(len(ct)):
    if 'Precinct Counted:' in str(ct['office'][i]):
        precinct = str(ct['precinct'][i])
        batch = ''
        #Unclear what the mode of the precinct votes is -- may be any mode
        mode = ''
    if 'Batch Counted:' in str(ct['audited'][i]):
        precinct = ''
        batch = str(ct['original'][i])
        #All batches are early vote batches
        mode = 'EARLY'
    if 'Race:' in str(ct['office_sig'][i]):
        office = str(ct['office'][i])
    if str(ct['original'][i]) != 'nan' and str(ct['audited'][i]) != 'nan':
        try:
            state = "ARIZONA"
            county = COUNTY_NAME,
            candidate = str(ct['candidate'][i])
            original = int(ct['original'][i])
            audited = int(ct['audited'][i])
            difference = audited - original
            method = "MANUAL"
            if 'Total' not in candidate:
                cleaned = pd.concat([cleaned,
                            pd.DataFrame([[state,county,precinct,batch,office,\
                            candidate,original,audited,difference,\
                            mode,method]])
                            ])
        except Exception as inst:
            print(f"ROW {i} NOT SAVED: ", inst)

cleaned.columns = ['state',
                   'county',
                   'precinct',
                   'batch',
                   'office',
                   'candidate',
                   'original',
                   'audited',
                   'difference',
                   'mode',
                   'type'
                  ]

cleaned = cleaned.loc[cleaned.candidate != 'nan']

cleaned.precinct = cleaned.precinct.str.strip()
cleaned.office = cleaned.office.str.strip()
cleaned.county = COUNTY_NAME

cleaned.to_csv(SAVE_FILE_NAME, index=False)
